R语言绘制提琴图示例
前篇简介了R语言绘制箱线图,今天继续介绍提琴图。提琴图可以认为是箱线图的拓展类型吧,在箱线图的基础上增添了密度信息,与常规的箱线图相比,提琴图能够方便直观地观测数值的密度分布范围。
同样地,本篇通过分享R语言绘制提琴图的几种常见类型的简单示例,以帮助入门的同学们掌握常规的作图技巧。
作图数据的网盘链接(提取码pmbr):
https://pan.baidu.com/s/1WZ2UzXPGw9M448v-fms6RA
网盘附件“alpha.csv”为某16S细菌群落测序所获得的部分alpha多样性指数数据。其第一列为样本名称;observed_species、shannon、chao1和PD_whole_tree为四种类型的alpha多样性指数,其中的数值代表了各样本中该alpha多样性指数的数值;group1和group2为样本所对应的分组信息。
这和前文箱线图的示例作图数据是一样的。本篇我们将更换为绘制提琴图,展示alpha多样性指数在各分组中的分布概况。
R语言绘制提琴图的简单示例
我们首先将作图数据读到R中,并分别从中挑选部分数据,便于后续绘制不同的样式。
#读取数据library(reshape2)
alpha <- read.csv('alpha.csv', stringsAsFactors = FALSE)
alpha$group2 <- factor(alpha$group2)
alpha1 <- melt(alpha, id = c('samples', 'group1', 'group2'))
alpha2 <- subset(alpha1, variable == 'chao1')
alpha3 <- subset(alpha2, group1 == 'c')
vioplot()提琴图
vioplot包中的vioplot()命令可用于绘制提琴图。我们加载vioplot包使用vioplot()绘制提琴图展示chao1指数在c分组中的分布,如下示例。
##vioplot() 提琴图,详情 ?vioplot 查看帮助library(vioplot)
c1 <- subset(alpha3, group2 == '1')$value
c2 <- subset(alpha3, group2 == '2')$value
c3 <- subset(alpha3, group2 == '3')$value
c4 <- subset(alpha3, group2 == '4')$value
c5 <- subset(alpha3, group2 == '5')$value
#pdf('vioplot.pdf', width = 6.5, height = 5)
png('vioplot.png', width = 2000, height = 1500, res = 300, units = 'px')
vioplot(c1, c2, c3, c4, c5, col = '#f8766d')
title(ylab = 'Chao1 (group c)')
dev.off()
ggplot2绘制提琴图
同样地,ggplot2作图才是本篇要重点介绍的,归因于它做图更加灵活、功能更为强大。以下继续展示使用ggplot2绘制提琴图的示例。
library(ggplot2)
#单变量提琴图
p <- ggplot(alpha2, aes(x = group2, y = value, fill = group1)) +
geom_violin(position = position_dodge(width = 1), scale = 'width') +
theme(panel.grid = element_blank(), panel.background = element_rect(fill = 'transparent', color = 'black'), legend.title = element_blank(), legend.key = element_blank()) +
labs(x = '', y = 'Chao1')
#ggsave('ggplot2.plot.pdf', p, width = 6, height = 4)
ggsave('ggplot2.plot.png', p, width = 6, height = 4)
p <- ggplot(alpha2, aes(x = group2, y = value, fill = group1)) +
geom_violin(position = position_dodge(width = 1), scale = 'width') +
geom_jitter(aes(color = group1)) +
scale_color_manual(values = c('red', 'seagreen3')) +
theme(panel.grid = element_blank(), panel.background = element_rect(fill = 'transparent', color = 'black'), legend.position = 'none') +
labs(x = '', y = 'Chao1 (group c)')
#ggsave('ggplot2.plot.pdf', p, width = 6, height = 4)
ggsave('ggplot2.plot.png', p, width = 6, height = 4)
p <- ggplot(alpha2, aes(x = group2, y = value, fill = group1)) +
geom_violin(position = position_dodge(width = 1), scale = 'width') +
geom_boxplot(position = position_dodge(width = 1), outlier.size = 0.7, width = 0.2, show.legend = FALSE) +
theme(panel.grid = element_blank(), panel.background = element_rect(fill = 'transparent', color = 'black'), legend.title = element_blank(), legend.key = element_blank()) +
labs(x = '', y = 'Chao1')
#ggsave('ggplot2.plot.pdf', p, width = 6, height = 4)
ggsave('ggplot2.plot.png', p, width = 6, height = 4)
p <- ggplot(alpha1, aes(x = group2, y = value, fill = group1)) +
geom_violin(position = position_dodge(width = 1), scale = 'width') +
geom_boxplot(position = position_dodge(width = 1), outlier.size = 0.7, width = 0.2, show.legend = FALSE) +
facet_wrap(~variable, 2, scales = 'free') +
labs(x = '', y = '') +
theme(panel.grid = element_blank(), panel.background = element_rect(fill = 'transparent', color = 'black'), legend.title = element_blank(), legend.key = element_blank())
#ggsave('ggplot2.plot.pdf', p, width = 7, height = 5)
ggsave('ggplot2.plot.png', p, width = 7, height = 5)
#先绘制提琴图主体
p <- ggplot(data = alpha2, aes(x = group2, y = value, fill = group1)) +
geom_violin(position = position_dodge(width = 1), scale = 'width') +
geom_boxplot(position = position_dodge(width = 1), outlier.size = 0.7, width = 0.2, show.legend = FALSE) +
theme(panel.grid = element_blank(), panel.background = element_rect(fill = 'transparent', color = 'black'), legend.title = element_blank(), legend.key = element_blank()) +
labs(x = '', y = 'Chao1')
#再手动添加显著性标记
#注意,这里的显著性是提前已经计算好的,我们通过手动输入进来
#本篇只关注作图,不涉及统计分析
library(doBy)
alpha2_stat <- summaryBy(value~group2, alpha2, FUN = max)
names(alpha2_stat) <- c('group2', 'value')
alpha2_stat$group1 <- NA
alpha2_stat$sig <- rep('***', 5)
p <- p +
geom_text(data = alpha2_stat, aes(label = sig), vjust = -0.3) +
annotate('text', x = alpha2_stat$group2, y = alpha2_stat$value, label = '———', vjust = -0.3)
#ggsave('ggplot2.plot.pdf', p, width = 6, height = 4)
ggsave('ggplot2.plot.png', p, width = 6, height = 4)
#先绘制提琴图主体
p <- ggplot(data = alpha1, aes(x = group2, y = value, fill = group1)) +
geom_violin(position = position_dodge(width = 1), scale = 'width') +
geom_boxplot(position = position_dodge(width = 1), outlier.size = 0.7, width = 0.2, show.legend = FALSE) +
facet_wrap(~variable, 2, scales = 'free') +
theme(panel.grid = element_blank(), panel.background = element_rect(fill = 'transparent', color = 'black'), legend.title = element_blank(), legend.key = element_blank()) +
labs(x = '', y = 'Chao1')
#再手动添加显著性标记
#同上所述,这里的显著性是提前通过差异分析已经计算好的,我们通过手动输入进来
alpha1_stat <- summaryBy(value~group1+group2+variable, alpha1, FUN = max)
names(alpha1_stat) <- c('group1', 'group2', 'variable', 'value')
alpha1_stat$sig <- c('a', 'a', 'a', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'a', 'b', 'b', 'a', 'b', 'c', 'c', 'b', 'a', 'a', 'b', 'a', 'a', 'a', 'b', 'a', 'a', 'a', 'a', 'a')
p <- p +
geom_text(data = alpha1_stat, aes(label = sig, color = group1), position = position_dodge(1), vjust = -0.3)
#ggsave('ggplot2.plot.pdf', p, width = 8, height = 6)
ggsave('ggplot2.plot.png', p, width = 8, height = 6)
由于提琴图和箱线图均属于同一类型的统计图,所以本篇的内容和前篇箱线图的内容大致一致。常见的方法就先简介到这里了。
R包rcompanion执行非参数双因素方差分析(Scheirer-Ray-Hare检验)
R语言执行非参数单因素方差分析(Kruskal-Wallis检验、Friedman检验)